1   package org.apache.lucene.codecs.lucene42;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.Closeable;
21  import java.io.IOException;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Iterator;
25  import java.util.NoSuchElementException;
26  
27  import org.apache.lucene.codecs.CodecUtil;
28  import org.apache.lucene.codecs.TermVectorsReader;
29  import org.apache.lucene.codecs.compressing.CompressionMode;
30  import org.apache.lucene.codecs.compressing.Decompressor;
31  import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsIndexReader;
32  import org.apache.lucene.index.CorruptIndexException;
33  import org.apache.lucene.index.DocsAndPositionsEnum;
34  import org.apache.lucene.index.FieldInfo;
35  import org.apache.lucene.index.FieldInfos;
36  import org.apache.lucene.index.Fields;
37  import org.apache.lucene.index.IndexFileNames;
38  import org.apache.lucene.index.PostingsEnum;
39  import org.apache.lucene.index.SegmentInfo;
40  import org.apache.lucene.index.Terms;
41  import org.apache.lucene.index.TermsEnum;
42  import org.apache.lucene.store.AlreadyClosedException;
43  import org.apache.lucene.store.ByteArrayDataInput;
44  import org.apache.lucene.store.ChecksumIndexInput;
45  import org.apache.lucene.store.Directory;
46  import org.apache.lucene.store.IOContext;
47  import org.apache.lucene.store.IndexInput;
48  import org.apache.lucene.util.Accountable;
49  import org.apache.lucene.util.Accountables;
50  import org.apache.lucene.util.ArrayUtil;
51  import org.apache.lucene.util.BytesRef;
52  import org.apache.lucene.util.IOUtils;
53  import org.apache.lucene.util.LongsRef;
54  import org.apache.lucene.util.packed.BlockPackedReaderIterator;
55  import org.apache.lucene.util.packed.PackedInts;
56  
57  /**
58   * 4.2 term vectors reader
59   * @deprecated only for reading old segments
60   */
61  @Deprecated
62  final class Lucene42TermVectorsReader extends TermVectorsReader implements Closeable {
63  
64    private final FieldInfos fieldInfos;
65    final Lucene41StoredFieldsIndexReader indexReader;
66    final IndexInput vectorsStream;
67    private final int version;
68    private final int packedIntsVersion;
69    private final CompressionMode compressionMode;
70    private final Decompressor decompressor;
71    private final int chunkSize;
72    private final int numDocs;
73    private boolean closed;
74    private final BlockPackedReaderIterator reader;
75    
76    static final String VECTORS_EXTENSION = "tvd";
77    static final String VECTORS_INDEX_EXTENSION = "tvx";
78  
79    static final String CODEC_SFX_IDX = "Index";
80    static final String CODEC_SFX_DAT = "Data";
81  
82    static final int VERSION_START = 0;
83    static final int VERSION_CHECKSUM = 1;
84    static final int VERSION_CURRENT = VERSION_CHECKSUM;
85    
86    static final int BLOCK_SIZE = 64;
87  
88    static final int POSITIONS = 0x01;
89    static final int   OFFSETS = 0x02;
90    static final int  PAYLOADS = 0x04;
91    static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
92  
93    // used by clone
94    private Lucene42TermVectorsReader(Lucene42TermVectorsReader reader) {
95      this.fieldInfos = reader.fieldInfos;
96      this.vectorsStream = reader.vectorsStream.clone();
97      this.indexReader = reader.indexReader.clone();
98      this.packedIntsVersion = reader.packedIntsVersion;
99      this.compressionMode = reader.compressionMode;
100     this.decompressor = reader.decompressor.clone();
101     this.chunkSize = reader.chunkSize;
102     this.numDocs = reader.numDocs;
103     this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
104     this.version = reader.version;
105     this.closed = false;
106   }
107 
108   /** Sole constructor. */
109   public Lucene42TermVectorsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn,
110       IOContext context, String formatName, CompressionMode compressionMode) throws IOException {
111     this.compressionMode = compressionMode;
112     final String segment = si.name;
113     boolean success = false;
114     fieldInfos = fn;
115     numDocs = si.maxDoc();
116     ChecksumIndexInput indexStream = null;
117     try {
118       // Load the index into memory
119       final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
120       indexStream = d.openChecksumInput(indexStreamFN, context);
121       final String codecNameIdx = formatName + CODEC_SFX_IDX;
122       version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
123       assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
124       indexReader = new Lucene41StoredFieldsIndexReader(indexStream, si);
125       
126       if (version >= VERSION_CHECKSUM) {
127         indexStream.readVLong(); // the end of the data file
128         CodecUtil.checkFooter(indexStream);
129       } else {
130         CodecUtil.checkEOF(indexStream);
131       }
132       indexStream.close();
133       indexStream = null;
134 
135       // Open the data file and read metadata
136       final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
137       vectorsStream = d.openInput(vectorsStreamFN, context);
138       final String codecNameDat = formatName + CODEC_SFX_DAT;
139       int version2 = CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
140       if (version != version2) {
141         throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + version2, vectorsStream);
142       }
143       assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
144       
145       long pos = vectorsStream.getFilePointer();
146       if (version >= VERSION_CHECKSUM) {
147         // NOTE: data file is too costly to verify checksum against all the bytes on open,
148         // but for now we at least verify proper structure of the checksum footer: which looks
149         // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
150         // such as file truncation.
151         CodecUtil.retrieveChecksum(vectorsStream);
152         vectorsStream.seek(pos);
153       }
154 
155       packedIntsVersion = vectorsStream.readVInt();
156       chunkSize = vectorsStream.readVInt();
157       decompressor = compressionMode.newDecompressor();
158       this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
159 
160       success = true;
161     } finally {
162       if (!success) {
163         IOUtils.closeWhileHandlingException(this, indexStream);
164       }
165     }
166   }
167 
168   /**
169    * @throws AlreadyClosedException if this TermVectorsReader is closed
170    */
171   private void ensureOpen() throws AlreadyClosedException {
172     if (closed) {
173       throw new AlreadyClosedException("this FieldsReader is closed");
174     }
175   }
176 
177   @Override
178   public void close() throws IOException {
179     if (!closed) {
180       IOUtils.close(vectorsStream);
181       closed = true;
182     }
183   }
184 
185   @Override
186   public TermVectorsReader clone() {
187     return new Lucene42TermVectorsReader(this);
188   }
189 
190   @Override
191   public Fields get(int doc) throws IOException {
192     ensureOpen();
193 
194     // seek to the right place
195     {
196       final long startPointer = indexReader.getStartPointer(doc);
197       vectorsStream.seek(startPointer);
198     }
199 
200     // decode
201     // - docBase: first doc ID of the chunk
202     // - chunkDocs: number of docs of the chunk
203     final int docBase = vectorsStream.readVInt();
204     final int chunkDocs = vectorsStream.readVInt();
205     if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
206       throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc, vectorsStream);
207     }
208 
209     final int skip; // number of fields to skip
210     final int numFields; // number of fields of the document we're looking for
211     final int totalFields; // total number of fields of the chunk (sum for all docs)
212     if (chunkDocs == 1) {
213       skip = 0;
214       numFields = totalFields = vectorsStream.readVInt();
215     } else {
216       reader.reset(vectorsStream, chunkDocs);
217       int sum = 0;
218       for (int i = docBase; i < doc; ++i) {
219         sum += reader.next();
220       }
221       skip = sum;
222       numFields = (int) reader.next();
223       sum += numFields;
224       for (int i = doc + 1; i < docBase + chunkDocs; ++i) {
225         sum += reader.next();
226       }
227       totalFields = sum;
228     }
229 
230     if (numFields == 0) {
231       // no vectors
232       return null;
233     }
234 
235     // read field numbers that have term vectors
236     final int[] fieldNums;
237     {
238       final int token = vectorsStream.readByte() & 0xFF;
239       assert token != 0; // means no term vectors, cannot happen since we checked for numFields == 0
240       final int bitsPerFieldNum = token & 0x1F;
241       int totalDistinctFields = token >>> 5;
242       if (totalDistinctFields == 0x07) {
243         totalDistinctFields += vectorsStream.readVInt();
244       }
245       ++totalDistinctFields;
246       final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
247       fieldNums = new int[totalDistinctFields];
248       for (int i = 0; i < totalDistinctFields; ++i) {
249         fieldNums[i] = (int) it.next();
250       }
251     }
252 
253     // read field numbers and flags
254     final int[] fieldNumOffs = new int[numFields];
255     final PackedInts.Reader flags;
256     {
257       final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
258       final PackedInts.Reader allFieldNumOffs = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
259       switch (vectorsStream.readVInt()) {
260         case 0:
261           final PackedInts.Reader fieldFlags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, fieldNums.length, FLAGS_BITS);
262           PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
263           for (int i = 0; i < totalFields; ++i) {
264             final int fieldNumOff = (int) allFieldNumOffs.get(i);
265             assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
266             final int fgs = (int) fieldFlags.get(fieldNumOff);
267             f.set(i, fgs);
268           }
269           flags = f;
270           break;
271         case 1:
272           flags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, FLAGS_BITS);
273           break;
274         default:
275           throw new AssertionError();
276       }
277       for (int i = 0; i < numFields; ++i) {
278         fieldNumOffs[i] = (int) allFieldNumOffs.get(skip + i);
279       }
280     }
281 
282     // number of terms per field for all fields
283     final PackedInts.Reader numTerms;
284     final int totalTerms;
285     {
286       final int bitsRequired = vectorsStream.readVInt();
287       numTerms = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
288       int sum = 0;
289       for (int i = 0; i < totalFields; ++i) {
290         sum += numTerms.get(i);
291       }
292       totalTerms = sum;
293     }
294 
295     // term lengths
296     int docOff = 0, docLen = 0, totalLen;
297     final int[] fieldLengths = new int[numFields];
298     final int[][] prefixLengths = new int[numFields][];
299     final int[][] suffixLengths = new int[numFields][];
300     {
301       reader.reset(vectorsStream, totalTerms);
302       // skip
303       int toSkip = 0;
304       for (int i = 0; i < skip; ++i) {
305         toSkip += numTerms.get(i);
306       }
307       reader.skip(toSkip);
308       // read prefix lengths
309       for (int i = 0; i < numFields; ++i) {
310         final int termCount = (int) numTerms.get(skip + i);
311         final int[] fieldPrefixLengths = new int[termCount];
312         prefixLengths[i] = fieldPrefixLengths;
313         for (int j = 0; j < termCount; ) {
314           final LongsRef next = reader.next(termCount - j);
315           for (int k = 0; k < next.length; ++k) {
316             fieldPrefixLengths[j++] = (int) next.longs[next.offset + k];
317           }
318         }
319       }
320       reader.skip(totalTerms - reader.ord());
321 
322       reader.reset(vectorsStream, totalTerms);
323       // skip
324       toSkip = 0;
325       for (int i = 0; i < skip; ++i) {
326         for (int j = 0; j < numTerms.get(i); ++j) {
327           docOff += reader.next();
328         }
329       }
330       for (int i = 0; i < numFields; ++i) {
331         final int termCount = (int) numTerms.get(skip + i);
332         final int[] fieldSuffixLengths = new int[termCount];
333         suffixLengths[i] = fieldSuffixLengths;
334         for (int j = 0; j < termCount; ) {
335           final LongsRef next = reader.next(termCount - j);
336           for (int k = 0; k < next.length; ++k) {
337             fieldSuffixLengths[j++] = (int) next.longs[next.offset + k];
338           }
339         }
340         fieldLengths[i] = sum(suffixLengths[i]);
341         docLen += fieldLengths[i];
342       }
343       totalLen = docOff + docLen;
344       for (int i = skip + numFields; i < totalFields; ++i) {
345         for (int j = 0; j < numTerms.get(i); ++j) {
346           totalLen += reader.next();
347         }
348       }
349     }
350 
351     // term freqs
352     final int[] termFreqs = new int[totalTerms];
353     {
354       reader.reset(vectorsStream, totalTerms);
355       for (int i = 0; i < totalTerms; ) {
356         final LongsRef next = reader.next(totalTerms - i);
357         for (int k = 0; k < next.length; ++k) {
358           termFreqs[i++] = 1 + (int) next.longs[next.offset + k];
359         }
360       }
361     }
362 
363     // total number of positions, offsets and payloads
364     int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;
365     for (int i = 0, termIndex = 0; i < totalFields; ++i) {
366       final int f = (int) flags.get(i);
367       final int termCount = (int) numTerms.get(i);
368       for (int j = 0; j < termCount; ++j) {
369         final int freq = termFreqs[termIndex++];
370         if ((f & POSITIONS) != 0) {
371           totalPositions += freq;
372         }
373         if ((f & OFFSETS) != 0) {
374           totalOffsets += freq;
375         }
376         if ((f & PAYLOADS) != 0) {
377           totalPayloads += freq;
378         }
379       }
380       assert i != totalFields - 1 || termIndex == totalTerms : termIndex + " " + totalTerms;
381     }
382 
383     final int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs);
384     final int[][] positions, startOffsets, lengths;
385     if (totalPositions > 0) {
386       positions = readPositions(skip, numFields, flags, numTerms, termFreqs, POSITIONS, totalPositions, positionIndex);
387     } else {
388       positions = new int[numFields][];
389     }
390 
391     if (totalOffsets > 0) {
392       // average number of chars per term
393       final float[] charsPerTerm = new float[fieldNums.length];
394       for (int i = 0; i < charsPerTerm.length; ++i) {
395         charsPerTerm[i] = Float.intBitsToFloat(vectorsStream.readInt());
396       }
397       startOffsets = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
398       lengths = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
399 
400       for (int i = 0; i < numFields; ++i) {
401         final int[] fStartOffsets = startOffsets[i];
402         final int[] fPositions = positions[i];
403         // patch offsets from positions
404         if (fStartOffsets != null && fPositions != null) {
405           final float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
406           for (int j = 0; j < startOffsets[i].length; ++j) {
407             fStartOffsets[j] += (int) (fieldCharsPerTerm * fPositions[j]);
408           }
409         }
410         if (fStartOffsets != null) {
411           final int[] fPrefixLengths = prefixLengths[i];
412           final int[] fSuffixLengths = suffixLengths[i];
413           final int[] fLengths = lengths[i];
414           for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
415             // delta-decode start offsets and  patch lengths using term lengths
416             final int termLength = fPrefixLengths[j] + fSuffixLengths[j];
417             lengths[i][positionIndex[i][j]] += termLength;
418             for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) {
419               fStartOffsets[k] += fStartOffsets[k - 1];
420               fLengths[k] += termLength;
421             }
422           }
423         }
424       }
425     } else {
426       startOffsets = lengths = new int[numFields][];
427     }
428     if (totalPositions > 0) {
429       // delta-decode positions
430       for (int i = 0; i < numFields; ++i) {
431         final int[] fPositions = positions[i];
432         final int[] fpositionIndex = positionIndex[i];
433         if (fPositions != null) {
434           for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
435             // delta-decode start offsets
436             for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) {
437               fPositions[k] += fPositions[k - 1];
438             }
439           }
440         }
441       }
442     }
443 
444     // payload lengths
445     final int[][] payloadIndex = new int[numFields][];
446     int totalPayloadLength = 0;
447     int payloadOff = 0;
448     int payloadLen = 0;
449     if (totalPayloads > 0) {
450       reader.reset(vectorsStream, totalPayloads);
451       // skip
452       int termIndex = 0;
453       for (int i = 0; i < skip; ++i) {
454         final int f = (int) flags.get(i);
455         final int termCount = (int) numTerms.get(i);
456         if ((f & PAYLOADS) != 0) {
457           for (int j = 0; j < termCount; ++j) {
458             final int freq = termFreqs[termIndex + j];
459             for (int k = 0; k < freq; ++k) {
460               final int l = (int) reader.next();
461               payloadOff += l;
462             }
463           }
464         }
465         termIndex += termCount;
466       }
467       totalPayloadLength = payloadOff;
468       // read doc payload lengths
469       for (int i = 0; i < numFields; ++i) {
470         final int f = (int) flags.get(skip + i);
471         final int termCount = (int) numTerms.get(skip + i);
472         if ((f & PAYLOADS) != 0) {
473           final int totalFreq = positionIndex[i][termCount];
474           payloadIndex[i] = new int[totalFreq + 1];
475           int posIdx = 0;
476           payloadIndex[i][posIdx] = payloadLen;
477           for (int j = 0; j < termCount; ++j) {
478             final int freq = termFreqs[termIndex + j];
479             for (int k = 0; k < freq; ++k) {
480               final int payloadLength = (int) reader.next();
481               payloadLen += payloadLength;
482               payloadIndex[i][posIdx+1] = payloadLen;
483               ++posIdx;
484             }
485           }
486           assert posIdx == totalFreq;
487         }
488         termIndex += termCount;
489       }
490       totalPayloadLength += payloadLen;
491       for (int i = skip + numFields; i < totalFields; ++i) {
492         final int f = (int) flags.get(i);
493         final int termCount = (int) numTerms.get(i);
494         if ((f & PAYLOADS) != 0) {
495           for (int j = 0; j < termCount; ++j) {
496             final int freq = termFreqs[termIndex + j];
497             for (int k = 0; k < freq; ++k) {
498               totalPayloadLength += reader.next();
499             }
500           }
501         }
502         termIndex += termCount;
503       }
504       assert termIndex == totalTerms : termIndex + " " + totalTerms;
505     }
506 
507     // decompress data
508     final BytesRef suffixBytes = new BytesRef();
509     decompressor.decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
510     suffixBytes.length = docLen;
511     final BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen);
512 
513     final int[] fieldFlags = new int[numFields];
514     for (int i = 0; i < numFields; ++i) {
515       fieldFlags[i] = (int) flags.get(skip + i);
516     }
517 
518     final int[] fieldNumTerms = new int[numFields];
519     for (int i = 0; i < numFields; ++i) {
520       fieldNumTerms[i] = (int) numTerms.get(skip + i);
521     }
522 
523     final int[][] fieldTermFreqs = new int[numFields][];
524     {
525       int termIdx = 0;
526       for (int i = 0; i < skip; ++i) {
527         termIdx += numTerms.get(i);
528       }
529       for (int i = 0; i < numFields; ++i) {
530         final int termCount = (int) numTerms.get(skip + i);
531         fieldTermFreqs[i] = new int[termCount];
532         for (int j = 0; j < termCount; ++j) {
533           fieldTermFreqs[i][j] = termFreqs[termIdx++];
534         }
535       }
536     }
537 
538     assert sum(fieldLengths) == docLen : sum(fieldLengths) + " != " + docLen;
539 
540     return new TVFields(fieldNums, fieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths,
541         prefixLengths, suffixLengths, fieldTermFreqs,
542         positionIndex, positions, startOffsets, lengths,
543         payloadBytes, payloadIndex,
544         suffixBytes);
545   }
546 
547   // field -> term index -> position index
548   private int[][] positionIndex(int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
549     final int[][] positionIndex = new int[numFields][];
550     int termIndex = 0;
551     for (int i = 0; i < skip; ++i) {
552       final int termCount = (int) numTerms.get(i);
553       termIndex += termCount;
554     }
555     for (int i = 0; i < numFields; ++i) {
556       final int termCount = (int) numTerms.get(skip + i);
557       positionIndex[i] = new int[termCount + 1];
558       for (int j = 0; j < termCount; ++j) {
559         final int freq = termFreqs[termIndex+j];
560         positionIndex[i][j + 1] = positionIndex[i][j] + freq;
561       }
562       termIndex += termCount;
563     }
564     return positionIndex;
565   }
566 
567   private int[][] readPositions(int skip, int numFields, PackedInts.Reader flags, PackedInts.Reader numTerms, int[] termFreqs, int flag, final int totalPositions, int[][] positionIndex) throws IOException {
568     final int[][] positions = new int[numFields][];
569     reader.reset(vectorsStream, totalPositions);
570     // skip
571     int toSkip = 0;
572     int termIndex = 0;
573     for (int i = 0; i < skip; ++i) {
574       final int f = (int) flags.get(i);
575       final int termCount = (int) numTerms.get(i);
576       if ((f & flag) != 0) {
577         for (int j = 0; j < termCount; ++j) {
578           final int freq = termFreqs[termIndex+j];
579           toSkip += freq;
580         }
581       }
582       termIndex += termCount;
583     }
584     reader.skip(toSkip);
585     // read doc positions
586     for (int i = 0; i < numFields; ++i) {
587       final int f = (int) flags.get(skip + i);
588       final int termCount = (int) numTerms.get(skip + i);
589       if ((f & flag) != 0) {
590         final int totalFreq = positionIndex[i][termCount];
591         final int[] fieldPositions = new int[totalFreq];
592         positions[i] = fieldPositions;
593         for (int j = 0; j < totalFreq; ) {
594           final LongsRef nextPositions = reader.next(totalFreq - j);
595           for (int k = 0; k < nextPositions.length; ++k) {
596             fieldPositions[j++] = (int) nextPositions.longs[nextPositions.offset + k];
597           }
598         }
599       }
600       termIndex += termCount;
601     }
602     reader.skip(totalPositions - reader.ord());
603     return positions;
604   }
605 
606   private class TVFields extends Fields {
607 
608     private final int[] fieldNums, fieldFlags, fieldNumOffs, numTerms, fieldLengths;
609     private final int[][] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
610     private final BytesRef suffixBytes, payloadBytes;
611 
612     public TVFields(int[] fieldNums, int[] fieldFlags, int[] fieldNumOffs, int[] numTerms, int[] fieldLengths,
613         int[][] prefixLengths, int[][] suffixLengths, int[][] termFreqs,
614         int[][] positionIndex, int[][] positions, int[][] startOffsets, int[][] lengths,
615         BytesRef payloadBytes, int[][] payloadIndex,
616         BytesRef suffixBytes) {
617       this.fieldNums = fieldNums;
618       this.fieldFlags = fieldFlags;
619       this.fieldNumOffs = fieldNumOffs;
620       this.numTerms = numTerms;
621       this.fieldLengths = fieldLengths;
622       this.prefixLengths = prefixLengths;
623       this.suffixLengths = suffixLengths;
624       this.termFreqs = termFreqs;
625       this.positionIndex = positionIndex;
626       this.positions = positions;
627       this.startOffsets = startOffsets;
628       this.lengths = lengths;
629       this.payloadBytes = payloadBytes;
630       this.payloadIndex = payloadIndex;
631       this.suffixBytes = suffixBytes;
632     }
633 
634     @Override
635     public Iterator<String> iterator() {
636       return new Iterator<String>() {
637         int i = 0;
638         @Override
639         public boolean hasNext() {
640           return i < fieldNumOffs.length;
641         }
642         @Override
643         public String next() {
644           if (!hasNext()) {
645             throw new NoSuchElementException();
646           }
647           final int fieldNum = fieldNums[fieldNumOffs[i++]];
648           return fieldInfos.fieldInfo(fieldNum).name;
649         }
650         @Override
651         public void remove() {
652           throw new UnsupportedOperationException();
653         }
654       };
655     }
656 
657     @Override
658     public Terms terms(String field) throws IOException {
659       final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
660       if (fieldInfo == null) {
661         return null;
662       }
663       int idx = -1;
664       for (int i = 0; i < fieldNumOffs.length; ++i) {
665         if (fieldNums[fieldNumOffs[i]] == fieldInfo.number) {
666           idx = i;
667           break;
668         }
669       }
670 
671       if (idx == -1 || numTerms[idx] == 0) {
672         // no term
673         return null;
674       }
675       int fieldOff = 0, fieldLen = -1;
676       for (int i = 0; i < fieldNumOffs.length; ++i) {
677         if (i < idx) {
678           fieldOff += fieldLengths[i];
679         } else {
680           fieldLen = fieldLengths[i];
681           break;
682         }
683       }
684       assert fieldLen >= 0;
685       return new TVTerms(numTerms[idx], fieldFlags[idx],
686           prefixLengths[idx], suffixLengths[idx], termFreqs[idx],
687           positionIndex[idx], positions[idx], startOffsets[idx], lengths[idx],
688           payloadIndex[idx], payloadBytes,
689           new BytesRef(suffixBytes.bytes, suffixBytes.offset + fieldOff, fieldLen));
690     }
691 
692     @Override
693     public int size() {
694       return fieldNumOffs.length;
695     }
696 
697   }
698 
699   private class TVTerms extends Terms {
700 
701     private final int numTerms, flags;
702     private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
703     private final BytesRef termBytes, payloadBytes;
704 
705     TVTerms(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs,
706         int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths,
707         int[] payloadIndex, BytesRef payloadBytes,
708         BytesRef termBytes) {
709       this.numTerms = numTerms;
710       this.flags = flags;
711       this.prefixLengths = prefixLengths;
712       this.suffixLengths = suffixLengths;
713       this.termFreqs = termFreqs;
714       this.positionIndex = positionIndex;
715       this.positions = positions;
716       this.startOffsets = startOffsets;
717       this.lengths = lengths;
718       this.payloadIndex = payloadIndex;
719       this.payloadBytes = payloadBytes;
720       this.termBytes = termBytes;
721     }
722 
723     @Override
724     public TermsEnum iterator() throws IOException {
725       final TVTermsEnum termsEnum = new TVTermsEnum();
726       termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
727           payloadIndex, payloadBytes,
728           new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
729       return termsEnum;
730     }
731 
732     @Override
733     public long size() throws IOException {
734       return numTerms;
735     }
736 
737     @Override
738     public long getSumTotalTermFreq() throws IOException {
739       return -1L;
740     }
741 
742     @Override
743     public long getSumDocFreq() throws IOException {
744       return numTerms;
745     }
746 
747     @Override
748     public int getDocCount() throws IOException {
749       return 1;
750     }
751 
752     @Override
753     public boolean hasFreqs() {
754       return true;
755     }
756 
757     @Override
758     public boolean hasOffsets() {
759       return (flags & OFFSETS) != 0;
760     }
761 
762     @Override
763     public boolean hasPositions() {
764       return (flags & POSITIONS) != 0;
765     }
766 
767     @Override
768     public boolean hasPayloads() {
769       return (flags & PAYLOADS) != 0;
770     }
771 
772   }
773 
774   private static class TVTermsEnum extends TermsEnum {
775 
776     private int numTerms, startPos, ord;
777     private int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
778     private ByteArrayDataInput in;
779     private BytesRef payloads;
780     private final BytesRef term;
781 
782     private TVTermsEnum() {
783       term = new BytesRef(16);
784     }
785 
786     void reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths,
787         int[] payloadIndex, BytesRef payloads, ByteArrayDataInput in) {
788       this.numTerms = numTerms;
789       this.prefixLengths = prefixLengths;
790       this.suffixLengths = suffixLengths;
791       this.termFreqs = termFreqs;
792       this.positionIndex = positionIndex;
793       this.positions = positions;
794       this.startOffsets = startOffsets;
795       this.lengths = lengths;
796       this.payloadIndex = payloadIndex;
797       this.payloads = payloads;
798       this.in = in;
799       startPos = in.getPosition();
800       reset();
801     }
802 
803     void reset() {
804       term.length = 0;
805       in.setPosition(startPos);
806       ord = -1;
807     }
808 
809     @Override
810     public BytesRef next() throws IOException {
811       if (ord == numTerms - 1) {
812         return null;
813       } else {
814         assert ord < numTerms;
815         ++ord;
816       }
817 
818       // read term
819       term.offset = 0;
820       term.length = prefixLengths[ord] + suffixLengths[ord];
821       if (term.length > term.bytes.length) {
822         term.bytes = ArrayUtil.grow(term.bytes, term.length);
823       }
824       in.readBytes(term.bytes, prefixLengths[ord], suffixLengths[ord]);
825 
826       return term;
827     }
828 
829     @Override
830     public SeekStatus seekCeil(BytesRef text)
831         throws IOException {
832       if (ord < numTerms && ord >= 0) {
833         final int cmp = term().compareTo(text);
834         if (cmp == 0) {
835           return SeekStatus.FOUND;
836         } else if (cmp > 0) {
837           reset();
838         }
839       }
840       // linear scan
841       while (true) {
842         final BytesRef term = next();
843         if (term == null) {
844           return SeekStatus.END;
845         }
846         final int cmp = term.compareTo(text);
847         if (cmp > 0) {
848           return SeekStatus.NOT_FOUND;
849         } else if (cmp == 0) {
850           return SeekStatus.FOUND;
851         }
852       }
853     }
854 
855     @Override
856     public void seekExact(long ord) throws IOException {
857       throw new UnsupportedOperationException();
858     }
859 
860     @Override
861     public BytesRef term() throws IOException {
862       return term;
863     }
864 
865     @Override
866     public long ord() throws IOException {
867       throw new UnsupportedOperationException();
868     }
869 
870     @Override
871     public int docFreq() throws IOException {
872       return 1;
873     }
874 
875     @Override
876     public long totalTermFreq() throws IOException {
877       return termFreqs[ord];
878     }
879 
880     @Override
881     public final PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
882 
883       if (PostingsEnum.featureRequested(flags, DocsAndPositionsEnum.OLD_NULL_SEMANTICS)) {
884         if (positions == null && startOffsets == null) {
885           return null;
886         }
887       }
888 
889       final TVDocsEnum docsEnum;
890       if (reuse != null && reuse instanceof TVDocsEnum) {
891         docsEnum = (TVDocsEnum) reuse;
892       } else {
893         docsEnum = new TVDocsEnum();
894       }
895 
896       docsEnum.reset(termFreqs[ord], positionIndex[ord], positions, startOffsets, lengths, payloads, payloadIndex);
897       return docsEnum;
898     }
899 
900   }
901 
902   private static class TVDocsEnum extends PostingsEnum {
903 
904     private int doc = -1;
905     private int termFreq;
906     private int positionIndex;
907     private int[] positions;
908     private int[] startOffsets;
909     private int[] lengths;
910     private final BytesRef payload;
911     private int[] payloadIndex;
912     private int basePayloadOffset;
913     private int i;
914 
915     TVDocsEnum() {
916       payload = new BytesRef();
917     }
918 
919     public void reset(int freq, int positionIndex, int[] positions,
920         int[] startOffsets, int[] lengths, BytesRef payloads,
921         int[] payloadIndex) {
922       this.termFreq = freq;
923       this.positionIndex = positionIndex;
924       this.positions = positions;
925       this.startOffsets = startOffsets;
926       this.lengths = lengths;
927       this.basePayloadOffset = payloads.offset;
928       this.payload.bytes = payloads.bytes;
929       payload.offset = payload.length = 0;
930       this.payloadIndex = payloadIndex;
931 
932       doc = i = -1;
933     }
934 
935     private void checkDoc() {
936       if (doc == NO_MORE_DOCS) {
937         throw new IllegalStateException("DocsEnum exhausted");
938       } else if (doc == -1) {
939         throw new IllegalStateException("DocsEnum not started");
940       }
941     }
942 
943     private void checkPosition() {
944       checkDoc();
945       if (i < 0) {
946         throw new IllegalStateException("Position enum not started");
947       } else if (i >= termFreq) {
948         throw new IllegalStateException("Read past last position");
949       }
950     }
951 
952     @Override
953     public int nextPosition() throws IOException {
954       if (doc != 0) {
955         throw new IllegalStateException();
956       } else if (i >= termFreq - 1) {
957         throw new IllegalStateException("Read past last position");
958       }
959 
960       ++i;
961 
962       if (payloadIndex != null) {
963         payload.offset = basePayloadOffset + payloadIndex[positionIndex + i];
964         payload.length = payloadIndex[positionIndex + i + 1] - payloadIndex[positionIndex + i];
965       }
966 
967       if (positions == null) {
968         return -1;
969       } else {
970         return positions[positionIndex + i];
971       }
972     }
973 
974     @Override
975     public int startOffset() throws IOException {
976       checkPosition();
977       if (startOffsets == null) {
978         return -1;
979       } else {
980         return startOffsets[positionIndex + i];
981       }
982     }
983 
984     @Override
985     public int endOffset() throws IOException {
986       checkPosition();
987       if (startOffsets == null) {
988         return -1;
989       } else {
990         return startOffsets[positionIndex + i] + lengths[positionIndex + i];
991       }
992     }
993 
994     @Override
995     public BytesRef getPayload() throws IOException {
996       checkPosition();
997       if (payloadIndex == null || payload.length == 0) {
998         return null;
999       } else {
1000         return payload;
1001       }
1002     }
1003 
1004     @Override
1005     public int freq() throws IOException {
1006       checkDoc();
1007       return termFreq;
1008     }
1009 
1010     @Override
1011     public int docID() {
1012       return doc;
1013     }
1014 
1015     @Override
1016     public int nextDoc() throws IOException {
1017       if (doc == -1) {
1018         return (doc = 0);
1019       } else {
1020         return (doc = NO_MORE_DOCS);
1021       }
1022     }
1023 
1024     @Override
1025     public int advance(int target) throws IOException {
1026       return slowAdvance(target);
1027     }
1028 
1029     @Override
1030     public long cost() {
1031       return 1;
1032     }
1033   }
1034 
1035   private static int sum(int[] arr) {
1036     int sum = 0;
1037     for (int el : arr) {
1038       sum += el;
1039     }
1040     return sum;
1041   }
1042 
1043   @Override
1044   public long ramBytesUsed() {
1045     return indexReader.ramBytesUsed();
1046   }
1047   
1048   @Override
1049   public Collection<Accountable> getChildResources() {
1050     return Collections.singleton(Accountables.namedAccountable("term vector index", indexReader));
1051   }
1052   
1053   @Override
1054   public void checkIntegrity() throws IOException {
1055     if (version >= VERSION_CHECKSUM) {
1056       CodecUtil.checksumEntireFile(vectorsStream);
1057     }
1058   }
1059 
1060   @Override
1061   public String toString() {
1062     return getClass().getSimpleName() + "(mode=" + compressionMode + ",chunksize=" + chunkSize + ")";
1063   }
1064 }